* This file produces runs the analysis in Table A5 and outputs to an excel spreadsheet

use "$saveddata/data_complete_withcosts.dta", clear


set more off
* Drop the variables we don't need in this particular run to make it run quicker
keep aeattenddisp mort death30 death90 death365 trust_code cause_of_death bin d1
compress

* Dummy variables for chapters

set more off
foreach var in  I2 I6 J1 J4 C3 C2 C1 C5 C6 F0 {
gen `var'=substr(cause_of_death,1,2)=="`var'"
replace `var'=0 if death30==0
}

gen test=0
foreach var in I2 I6 J1 J4 C3 C2 C1 C5 C6 F0 {
replace test=1 if `var'==1
}


	preserve	
	set more off
		cap drop diff_death
		
		* Collapse to correct level
		collapse (count) freq = aeattenddisp (mean) I2 I6 J1 J4 C3 C2 C1 C5 C6 F0 , by(bin)

		* Now do analysis...
		egen sumfreq = sum(freq)
		gen pfreq = freq/sumfreq
		drop sumfreq

		lab var pfreq "Proportion of patients"
		lab var bin "Exit time (10 min intervals)"
		compress


		** ANALYSIS HERE
		
		* bin polynomial variables
	forv x = 1/10 {
		gen bin`x' = bin^`x'
		}

	* parameters
	local porder = 10
	local start = 180
	local end = 240

	* waiting times - find end point
	local sumdiff = 999
	while abs(`sumdiff')>0.01 {
		
		cap drop pfreq_cf diff
			
		* display then update end point
		di "End point: `end'"
		local end = `end' + 10
		
		* compute counterfactual
		qui reg pfreq bin1-bin`porder' if bin<`start' | bin>`end'
		predict pfreq_cf, xb

		* check difference in mass
		gen diff = pfreq - pfreq_cf 
		qui su diff if bin>`start' | bin<`end'
		local sumdiff = r(sum)
		
		
		}
	drop diff

	* waiting times - compute effects
	qui reg freq bin1-bin`porder' if bin<`start' | bin>`end'
	predict freq_cf, xb
	gen wait = freq*bin 

	gen wait_cf = freq_cf*bin 
	gen wait_diff = wait_cf - wait

	su wait_diff
	local saving = r(sum)
	su freq
	local patients = r(sum)/2
	su wait_cf
	local totalwait = r(sum)/2
	su wait_diff if bin>=`start' & bin<=`end'
	local saving_int = r(sum)
	su wait_cf if bin>=`start' & bin<=`end'
	local totalwait_int = r(sum)/2
	su freq if bin>=`start' & bin<=`end'
	local patients_int = r(sum)/2
	local savingpp = round(`saving'/`patients',1)
	local savingpc = round(`saving'/`totalwait',0.01)
	local savingpp_int = round(`saving_int'/`patients_int',1)
	local savingpc_int = round(`saving_int'/`totalwait_int',0.01)
	di "Total waiting time saving (mins per patient): " `savingpp'
	di "Total waiting time saving (% of c.f. total): " `savingpc'
	di "Total waiting time saving in distorted window (mins per patient): " `savingpp_int'
	di "Total waiting time saving in disorted window (% of c.f. total): " `savingpc_int'

	* other outcomes - parameters and inputs to later calculations
	gen freq_diff = freq_cf - freq 
	egen sumfreq_pre_cf = sum(freq_cf) if bin>`start' & bin<=240
	egen sumfreq_post_diff = sum(freq_diff) if bin>240 & bin<=`end'
	egen sumfreq_pre = sum(freq) if bin>`start' & bin<=240
	gen w_pre = freq_cf/sumfreq_pre_cf
	gen w_post_diff = freq_diff/sumfreq_post_diff
	gen w = freq/sumfreq_pre
	gen nonmover_pre = sumfreq_pre_cf/sumfreq_pre
	drop *cf

	* other outcomes - compute counterfactuals and selection-only counterfactuals
	foreach var of varlist I2 I6 J1 J4 C3 C2 C1 C5 C6 F0 {

		* counterfactual outcomes
		qui reg `var' bin1-bin`porder' if bin<`start' | bin>=260
		
		predict `var'_cf, xb
		
		* non-movers: pre-target counterfactual average outcome 
		qui egen `var'_pre_cf = sum(`var'_cf*w_pre)
			
		* movers: post-target counterfactual average outcome (weighted by mover proportions)
		qui egen `var'_post_cf = sum(`var'_cf*w_post_diff)

		* selection-only counterfactual
		qui gen `var'_selection = nonmover_pre*`var'_pre_cf + (1-nonmover_pre)*`var'_post_cf
		
		* observed outcome (averaged over pre period)
		qui egen `var'_observed = sum(`var'*w)
		}

	format %10.0fc freq* sumfreq*
		
	* build results table
	gen helper = 1
	collapse (mean) nonmover_pre *_pre_cf *_post_cf *_selection *_observed, by(helper)

	foreach i in I2 I6 J1 J4 C3 C2 C1 C5 C6 F0 {
		gen `i'_diff = `i'_observed - `i'_selection
		gen `i'_diff_pc = `i'_diff / `i'_selection
		}

	foreach i in pre_cf post_cf selection observed diff diff_pc {
		rename *_`i' `i'_* 
		}
		

	foreach x in I2 I6 J1 J4 C3 C2 C1 C5 C6 F0 {
	su diff_`x'
	matrix observe_`x' = r(mean)
	su diff_pc_`x'
	matrix observe_pc_`x' = r(mean)
	}
	

restore

capture program drop myboot

program define myboot, rclass

* data for aggregate analysis
	preserve

	* draw bootstrap sample
	bsample, cluster(trust_code)
	
	cap drop diff_death
	

		
		* Collapse to correct level
		collapse (count) freq = aeattenddisp (mean) I2 I6 J1 J4 C3 C2 C1 C5 C6 F0 , by(bin)

		* Now do analysis...
		egen sumfreq = sum(freq)
		gen pfreq = freq/sumfreq
		drop sumfreq

		lab var pfreq "Proportion of patients"
		lab var bin "Exit time (10 min intervals)"
		compress


		** ANALYSIS HERE
		
		* bin polynomial variables
	forv x = 1/10 {
		gen bin`x' = bin^`x'
		}

	* parameters
	local porder = 10
	local start = 180
	local end = 300

	** waiting times - find end point
	local sumdiff = 999
	local t = 0
	while abs(`sumdiff')>0.01 & `t'<100{
		
		cap drop pfreq_cf diff
		
		* Update end point
		local end = `end' + 10
		
		* compute counterfactual
		qui reg pfreq bin1-bin`porder' if bin<`start' | bin>`end'
		predict pfreq_cf, xb

		* check difference in mass
		gen diff = pfreq - pfreq_cf 
		qui su diff if bin>`start' | bin<`end'
		local sumdiff = r(sum)
			
		* display end point
		di "End point: `end'"
		
		local t = `t'+1
		}
	drop diff

	* waiting times - compute effects
	qui reg freq bin1-bin`porder' if bin<`start' | bin>`end'
	predict freq_cf, xb
	gen wait = freq*bin 

	gen wait_cf = freq_cf*bin 
	gen wait_diff = wait_cf - wait

	su wait_diff
	local saving = r(sum)
	su freq
	local patients = r(sum)/2
	su wait_cf
	local totalwait = r(sum)/2
	su wait_diff if bin>=`start' & bin<=`end'
	local saving_int = r(sum)
	su wait_cf if bin>=`start' & bin<=`end'
	local totalwait_int = r(sum)/2
	su freq if bin>=`start' & bin<=`end'
	local patients_int = r(sum)/2
	local savingpp = round(`saving'/`patients',1)
	local savingpc = round(`saving'/`totalwait',0.01)
	local savingpp_int = round(`saving_int'/`patients_int',1)
	local savingpc_int = round(`saving_int'/`totalwait_int',0.01)
	di "Total waiting time saving (mins per patient): " `savingpp'
	di "Total waiting time saving (% of c.f. total): " `savingpc'
	di "Total waiting time saving in distorted window (mins per patient): " `savingpp_int'
	di "Total waiting time saving in disorted window (% of c.f. total): " `savingpc_int'

	* other outcomes - parameters and inputs to later calculations
	gen freq_diff = freq_cf - freq 
	egen sumfreq_pre_cf = sum(freq_cf) if bin>`start' & bin<=240
	egen sumfreq_post_diff = sum(freq_diff) if bin>240 & bin<=`end'
	egen sumfreq_pre = sum(freq) if bin>`start' & bin<=240
	gen w_pre = freq_cf/sumfreq_pre_cf
	gen w_post_diff = freq_diff/sumfreq_post_diff
	gen w = freq/sumfreq_pre
	gen nonmover_pre = sumfreq_pre_cf/sumfreq_pre
	drop *cf

	* other outcomes - compute counterfactuals and selection-only counterfactuals
	foreach var of varlist I2 I6 J1 J4 C3 C2 C1 C5 C6 F0 {

		* counterfactual outcomes
		qui reg `var' bin1-bin`porder' if bin<`start' | bin>=260
		
		predict `var'_cf, xb
		
		* non-movers: pre-target counterfactual average outcome 
		qui egen `var'_pre_cf = sum(`var'_cf*w_pre)
			
		* movers: post-target counterfactual average outcome (weighted by mover proportions)
		qui egen `var'_post_cf = sum(`var'_cf*w_post_diff)

		* selection-only counterfactual
		qui gen `var'_selection = nonmover_pre*`var'_pre_cf + (1-nonmover_pre)*`var'_post_cf
		
		* observed outcome (averaged over pre period)
		qui egen `var'_observed = sum(`var'*w)
		}

	format %10.0fc freq* sumfreq*
		
	* build results table
	gen helper = 1
	collapse (mean) nonmover_pre *_pre_cf *_post_cf *_selection *_observed, by(helper)

	foreach i in I2 I6 J1 J4 C3 C2 C1 C5 C6 F0 {
			if `t'<99{
				gen `i'_diff = `i'_observed - `i'_selection
				gen `i'_diff_pc = `i'_diff / `i'_selection
			}
			
			if `t'>98{
				gen `i'_diff = .
				gen `i'_diff_pc = .
			}
				
		}

	foreach i in pre_cf post_cf selection observed diff diff_pc {
		rename *_`i' `i'_* 
		}
		
	keep diff*
	
	foreach x in I2 I6 J1 J4 C3 C2 C1 C5 C6 F0 {
	su diff_`x'
	return scalar d_`x' = r(mean)
	su diff_pc_`x'
	return scalar d_pc_`x' = r(mean)
	/*
	su diff_pc_`x'
	return scalar d_`x' = r(mean)
	*/
	}
	
	*inpat_los revisit30 readmit30 discharge discharge_gp discharge_no
	
	restore

* END PROGRAMME DEFINE HERE	
end	


#delimit ;

* Simulate 199 times to test;
simulate  diff_I2 = r(d_I2) diff_pc_I2 = r(d_pc_I2)
diff_I6 = r(d_I6) diff_pc_I6 = r(d_pc_I6)

diff_J1 = r(d_J1) diff_pc_J1 = r(d_pc_J1) 
diff_J4 = r(d_J4) diff_pc_J4 = r(d_pc_J4) 

diff_C3 = r(d_C3) diff_pc_C3 = r(d_pc_C3)
diff_C2 = r(d_C2) diff_pc_C2 = r(d_pc_C2) 
diff_C1 = r(d_C1) diff_pc_C1 = r(d_pc_C1) 
diff_C5 = r(d_C5) diff_pc_C5 = r(d_pc_C5) 
diff_C6 = r(d_C6) diff_pc_C6 = r(d_pc_C6) 

diff_F0 = r(d_F0) diff_pc_F0 = r(d_pc_F0) 

, reps(199) seed(32786105): myboot;

* Boostrap;
#delimit;
bstat, stat( 	observe_I2, observe_pc_I2, observe_I6, observe_pc_I6,
				observe_J1, observe_pc_J1, observe_J4, observe_pc_J4,
				observe_C3, observe_pc_C3, observe_C2, observe_pc_C2, observe_C1, observe_pc_C1, observe_C5, observe_pc_C5, observe_C6, observe_pc_C6, observe_F0, observe_pc_F0);
 
#delimit cr

* Output results
putexcel set "$results\TableA5.xlsx", replace
putexcel A1 = "Variable"
putexcel B1 = "Coefficient"
putexcel C1 = "Std. error"

matrix b = e(b)'
putexcel A2 = matrix(b), rownames

matrix se = e(se)'
putexcel C2 = matrix(se)
